Tidy Tuesday Week 5 2022

Danielle Barnas

2/1/2022


Load Libraries

library(tidyverse)
library(tidytuesdayR)
library(here)
library(PNWColors)
library(plotly)
library(ggmap)
library(sp)
library(maps)
library(maptools)

rm(list=ls())

### Google API
API<-names(read_table("API.txt"))
register_google(key = API) ### uses my API in separate txt file

Bring in Data

airmen <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-02-08/airmen.csv')
## Rows: 1006 Columns: 16
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (13): name, last_name, first_name, rank_at_graduation, class, graduated...
## dbl   (1): number_of_aerial_victory_credits
## dttm  (2): graduation_date, reported_lost_date
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(airmen)
## # A tibble: 6 x 16
##   name           last_name first_name graduation_date     rank_at_graduat~ class
##   <chr>          <chr>     <chr>      <dttm>              <chr>            <chr>
## 1 Adams, John H~ Adams     John H., ~ 1945-04-15 00:00:00 2nd Lt           SE-4~
## 2 Adams, Paul    Adams     Paul       1943-04-29 00:00:00 2nd Lt           SE-4~
## 3 Adkins, Ruthe~ Adkins    Rutherfor~ 1944-10-16 00:00:00 2nd Lt           SE-4~
## 4 Adkins, Winst~ Adkins    Winston A. 1944-02-08 00:00:00 2nd Lt           TE-4~
## 5 Alexander, Ha~ Alexander Halbert L. 1944-11-20 00:00:00 2nd Lt           SE-4~
## 6 Alexander, Ha~ Alexander Harvey R.  1944-04-15 00:00:00 2nd Lt           TE-4~
## # ... with 10 more variables: graduated_from <chr>, pilot_type <chr>,
## #   military_hometown_of_record <chr>, state <chr>,
## #   aerial_victory_credits <chr>, number_of_aerial_victory_credits <dbl>,
## #   reported_lost <chr>, reported_lost_date <dttm>,
## #   reported_lost_location <chr>, web_profile <chr>

Process for GGMap

Retrieve Lat and Lon

# create character vector to use for geocode
stateID<-airmen %>%
  unite(col = state, military_hometown_of_record, state, sep = ", ", remove = T) %>%
  distinct(state) %>%
  drop_na() %>%
  as_vector

# geocode looks for lat and lon (with current output) of the state locations

#stateIDloc <- geocode(location = stateID, output = "latlon", source = "google")
# write csv to not have to run geocode again
#write_csv(stateIDloc, here("2022_Week6","Data","state_latlon.csv")) 

# read in csv of lat and lon for hometowns
stateIDloc <- read_csv(here("2022_Week6","Data","state_latlon.csv"))
## Rows: 390 Columns: 2
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl (2): lon, lat
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
# bind geocode to stateID
stateID <- stateID %>%
  cbind(stateIDloc) %>%
  rename(state = '.')


# join lat lon to df
airmen <- airmen %>%
  unite(col = state, military_hometown_of_record, state, sep = ", ", remove = T)

air_full <- airmen %>%
  count(state) %>%
  as_tibble() %>%
  right_join(airmen) %>% # rejoin after count()
  left_join(stateID, by = 'state') %>%
  drop_na(state, rank_at_graduation, name) %>% # remove na's from columns of interest
  filter(rank_at_graduation != "N/A",
         state != "Unk") %>%
  mutate(rank_at_graduation = str_replace_all(string = rank_at_graduation,
                                              pattern = "Capt$", # the dollar sign indicates an exact match
                                              replacement = "Captain"))  # replace values for consistency
## Joining, by = "state"
head(air_full)
## # A tibble: 6 x 18
##   state        n name  last_name first_name graduation_date     rank_at_graduat~
##   <chr>    <int> <chr> <chr>     <chr>      <dttm>              <chr>           
## 1 Ahoskie~     3 Reyn~ Reynolds  Clarence ~ 1945-08-04 00:00:00 Flight Officer  
## 2 Ahoskie~     3 Smit~ Smith     Graham     1942-07-03 00:00:00 2nd Lt          
## 3 Ahoskie~     3 Smit~ Smith     Reginald ~ 1945-08-04 00:00:00 Flight Officer  
## 4 Akron, ~     1 McCl~ McClenic  William B~ 1943-08-30 00:00:00 2nd Lt          
## 5 Albany,~     3 Blay~ Blaylock  Joseph E.  1945-06-27 00:00:00 2nd Lt          
## 6 Albany,~     3 Hall~ Hall      Richard W. 1943-07-28 00:00:00 2nd Lt          
## # ... with 11 more variables: class <chr>, graduated_from <chr>,
## #   pilot_type <chr>, aerial_victory_credits <chr>,
## #   number_of_aerial_victory_credits <dbl>, reported_lost <chr>,
## #   reported_lost_date <dttm>, reported_lost_location <chr>, web_profile <chr>,
## #   lon <dbl>, lat <dbl>

Create google map

# watercolor type
USmap <- get_map("US", maptype = "watercolor", zoom = 4)

Create plotly text

I want to show graduate names by graduation rank when I hover over a gps point

# Remove uncecessary columns and reduce grad year (for possible additional processing)
air_full <- air_full %>%
  select(-c(pilot_type, aerial_victory_credits, number_of_aerial_victory_credits, reported_lost, reported_lost_date, reported_lost_location, web_profile)) %>%
  separate(graduation_date, into = c('graduation_year','month','day'), sep = "-", remove = T) %>% # separate grad year from other date details
  select(-c(month,day)) %>% 
  mutate(parA = "(", # create columns for parentheses to add to year 
         parB = ")") %>% 
  unite(col = graduation_year, parA, graduation_year, parB, sep="", remove = T) %>% # put grad year in parentheses
  unite(col = name, name, graduation_year, sep=" ", remove = T) # add graduation year to name column

# total distinct states in dataframe
nstates <- stateID %>% count() %>% as.numeric 

# create empty dataframe to store information in for loop
gradText <- tibble('state' = as.character(NA), # create empty state column
                   'text' = as.character(NA), # create empty new column
                   'rank_at_graduation' = as.character(NA)) # create empty rank at graduation column

# create df of single vector names by state and graduation rank
for (i in 1:nstates) {

  temp<-air_full %>% # create placeholder dataframe with filtered data
    filter(state == stateID[i,1]) %>%
    group_by(rank_at_graduation) %>%
    mutate(text = str_flatten(string = as.vector(name), collapse = "__")) %>% # combine rows into one character vector string
    ungroup() %>%
    select(state,text,rank_at_graduation) %>%
    distinct()

  gradText <- gradText %>% # add new rows to dataframe
    rbind(temp) %>%
    drop_na()
}

# create df with all names grouped by graduation rank per state
air_text <- gradText %>%
  mutate(text = str_replace_all(string = text, pattern = "__", replacement = "\n ")) %>%  # replace double underscore with new line
  left_join(stateID) %>%   # add lat and lon columns
  unite(col = rank_text, rank_at_graduation, text, sep= ": \n", remove = T) %>%
  group_by(state) %>%
  mutate(rank_text = str_flatten(string = as.vector(rank_text), collapse = "\n \n")) %>% # bring all rows together to unite full state graduates
  ungroup() %>%
  unite(col = state_rank_text, state, rank_text, sep = "\n \n", remove = T) %>%
  distinct() # remove duplicates
## Joining, by = "state"

Map

Use zoom and pan functions along the top bar to navigate clumped cities

# create a static ggmap first
staticMap <- ggmap(USmap) +
  geom_point(data = air_text,
             aes(x = lon,
                 y = lat,
                 text = state_rank_text), # adds hover_text when converted to plotly
             size = 2,
             color = "#05445e")+
  labs(x = "Longitude",
       y = "Latitude",
       title = "Airforce Graduates by Rank and Hometown",
       caption = "Watercolor Plotly ggMap")

# make it a plotly map
ggplotly(staticMap, tooltip = "text")